from pyspark import SparkConf, SparkContext
import os

# 设置python解释器环境变量
os.environ['PYSPARK_PYTHON'] = 'D:/PYTHON/python3.10/python.exe'

conf = SparkConf().setMaster('local[*]').setAppName('my_test_spark')
sc = SparkContext(conf=conf)

rdd = sc.parallelize([('狗', 10), ('狗', 20), ('猫', 6), ('猫', 8), ('猫', 8)])
rdd1 = rdd.reduceByKey(lambda a, b: a + b)
print(rdd1.collect())

sc.stop()
